gene_tss = read.csv('~/data/feature_file/gene_master.csv')
gene_tss = na.omit(gene_tss)

file_name = c('wt_pulse','wt_c10','wt_c15','wt_c20','wt_c40',
              'cac_pulse','cac_c10','cac_c15','cac_c20','cac_c40')


data_list=list("cac_pulse"=c(),"cac_c10"=c(),'cac_c15'=c(),"cac_c20"=c(),"cac_c40"=c(),
              "wt_pulse"=c(),"wt_c10"=c(),'cac_c15'=c(),"wt_c20"=c(),"wt_c40"=c() )

for (i in 1:nrow(gene_tss)) {
  chr =  gene_tss$chr[i]
  if(gene_tss$strand[i]=="+"){
    new_start= gene_tss$tss[i] - 500
    new_end=  gene_tss$tss[i] + 1000
    
    chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
    p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
    
    for (f in 1:10){
      #data files
      file_name.bam = paste("~/data/bam_bai/",file_name[f],".bam", sep='')
      file_name.bam.bai = paste("~/data/bam_bai/",file_name[f],".bam.bai",sep='')
      
      A_reads.l = scanBam(file = file_name.bam,
                          index = file_name.bam.bai,
                          param = p)
      
      #create a new GenomicRanges object for the reads from this list:
      A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                           ranges = IRanges(start = A_reads.l[[1]]$pos,
                                            width = A_reads.l[[1]]$isize))
      
      subset_data.gr = A_reads.gr[which(width(A_reads.gr)>140 & width(A_reads.gr)<180)]
      
      #finding the mipoints of those reads. 
      midpoints.gr =IRanges(start=mid(ranges(subset_data.gr)), width=1) 
      midpoints = as.data.frame(midpoints.gr)
      data_list[[f]]=c(data_list[[f]],(midpoints$start - gene_tss$tss[i]))
    }
    
  }else{
    new_start= gene_tss$tss[i] - 1000
    new_end=  gene_tss$tss[i] + 500
    
    chr.gr = GRanges(seqnames= chr, ranges = IRanges(start =new_start , end = new_end ))
    p = ScanBamParam(what = c("rname", "strand", "pos", "isize"),which = chr.gr)
    
    for (f in 1:10){
      #data files
      file_name.bam = paste("~/data/bam_bai/",file_name[f],".bam", sep='')
      file_name.bam.bai = paste("~/data/bam_bai/",file_name[f],".bam.bai",sep='')
      
      A_reads.l = scanBam(file = file_name.bam,
                          index = file_name.bam.bai,
                          param = p)
      
      #create a new GenomicRanges object for the reads from this list:
      A_reads.gr = GRanges(seqnames = A_reads.l[[1]]$rname,
                           ranges = IRanges(start = A_reads.l[[1]]$pos,
                                            width = A_reads.l[[1]]$isize))
      
      subset_data.gr = A_reads.gr[which(width(A_reads.gr)>140 & width(A_reads.gr)<180)]
      
      #finding the mipoints of those reads. 
      midpoints.gr =IRanges(start=mid(ranges(subset_data.gr)), width=1) 
      midpoints = as.data.frame(midpoints.gr)
      data_list[[f]]=c(data_list[[f]],(gene_tss$tss[i] - midpoints$start))
    }
    
  }
  if(i %% 500 ==0){
    cat(paste("gene # ",i,"\n",sep=""))
  }
  
}

saveRDS(data_list,file='~/data/tss_aggregate.RDS')

cl <- viridis(100, alpha = 1, begin = 0, end = 1, option = "D")
wt_col = cl[55]
cac_col = 'orange'

file.name <- "/data/figure/supple9_aggregate_line.png"
png(file = file.name, width =5, height = 4, units = "in", res = 200, bg = "white", type = "cairo-png" )
par(mar = c(4,4,4,2), cex=0.8)

plot(density(data_list[[5]],bw=30),xlim=c(-500,1000),xaxs="i",col=wt_col,main="",lwd=2,xlab='Distance from TSS (bp)')
lines(density(data_list[[10]],bw=30),col=cac_col,lwd=2)
lines(density(data_list[[1]],bw=30),col=wt_col,lwd=2,lty=2)
lines(density(data_list[[6]],bw=30),col=cac_col,lwd=2,lty=2)

legend('topright',c('WT, nascent',expression(paste(italic(cac1),Delta,', nascent')),'WT, mature',expression(paste(italic(cac1),Delta,', mature'))),col=c(cl[55],'orange'),lty=c(2,2,1,1),lwd=c(2,2,2,2),box.lwd = 0,box.col = "transparent",bg = "transparent")
dev.off()






